The dataset used in this report was collected on the 25th of January 2024 from The Data Worldbank. The data falls under the License CC-BY 4.0, which allows public use.
Being interested in the environmental state of the world, I wanted to know what is happening to our forests. As we likely all know, global warming is threatening our world, and to make an impact on reversing and preserving our planet, I think it’s important that people can visualize the impact of our actions, rather than hear about this. That is the goal of this project.
To do this I’ve asked the following questions:
Which 5 countries have suffered the highest loss in forest area?
Which countries have gained the most forest area over time?
Are there countries that have increased in forest area during this time?
Per year, is there a loss, neutrality, or increase in forest area globally?
Create an in-memory database and import libraries.
Locate dataset and import libraries.
Code
database_loc =R"C:\Users\ERMCl\Documents\FreelanceWork\Portfolio\Forest_Area_Internationally_2024.0125\API_6_DS2_en_csv_v2_6303688.csv"import duckdbconnection = duckdb.connect(database=':memory:')import pandas as pdimport geopandas as gpdimport plotly.graph_objects as goimport plotly.express as pxcreate_table_total_total_total_total_total_total_query ="""DROP TABLE ForestDB;P"""
Copy paste code output to help create the database.
Code
# x = 1993# while x < 2023:# print(f"CONCAT(y{x}, '%') AS y{x},"),# x += 1# print("done")# while x < 2023:# print(f"y{x} INT64,"),# x += 1# print("done")# while x < 2023:# print(f"y{x} = CONCAT(y{x}, '%'),"),# x += 1# print("done")
Create in-memory database from the dataset to allow faster data proccessing.
Query 1. Which 5 countries have suffered the highest loss in forest area?
Run a query to calculate the change in forest area and order the results in ascending order. Limit output to first 5 rows.
Code
query ="""SELECT DISTINCT Country_Name AS 'Country Name', Indicator_Name AS 'Indicator Name', y1993, y1994, y1995, y1996, y1997, y1998, y1999, y2000, y2001, y2002, y2003, y2004, y2005, y2006, y2007, y2008, y2009, y2010, y2011, y2012, y2013, y2014, y2015, y2016, y2017, y2018, y2019, y2020, y2021, (y2021 - y1993) AS 'Total % Forest Area Gained or Lost (1993-2021)'FROM ForestDBWHERE Indicator_Name ='Forest area (% of land area)'ORDER BY (y2021 - y1993) ASCLIMIT 5;"""table = connection.execute(query).df()
Reorganize the table.
Code
table_melted = table.melt(id_vars=['Country Name', 'Indicator Name', 'Total % Forest Area Gained or Lost (1993-2021)'], var_name='Year', value_name='% Forest Area')table_melted['Year'] = table_melted['Year'].str.extract('(\d+)', expand=False).astype(int)table_melted.tail(5)
Country Name
Indicator Name
Total % Forest Area Gained or Lost (1993-2021)
Year
% Forest Area
140
Nicaragua
Forest area (% of land area)
-24
2021
27
141
Paraguay
Forest area (% of land area)
-22
2021
40
142
Northern Mariana Islands
Forest area (% of land area)
-19
2021
53
143
Cambodia
Forest area (% of land area)
-17
2021
45
144
Gambia, The
Forest area (% of land area)
-16
2021
23
Visualization of the reorganized table.
Code
# Create the plotfig = px.line(table_melted, x='Year', y='% Forest Area', color='Country Name', line_shape='linear', hover_name="Country Name", custom_data=['Total % Forest Area Gained or Lost (1993-2021)'] )# Gray out all linesfig.update_traces(line=dict(color='lightgray'))# Define Country with the highest total loss of Forest Area from 1993 to 2021highest_loss_country = table_melted.loc[table_melted['Total % Forest Area Gained or Lost (1993-2021)'].idxmin()]['Country Name']# Add annotations to the country with the highest Forest lossname_data = table_melted[table_melted['Country Name'] == highest_loss_country].iloc[10]fig.add_annotation(x=name_data['Year'], y=name_data['% Forest Area'], text=f"<b>{highest_loss_country}<b>", showarrow=False, yshift=20, font=dict(color='white'))loss_data_point = table_melted[(table_melted['Country Name'] == highest_loss_country) & (table_melted['Year'] == table_melted['Year'].max())]fig.add_annotation(x=loss_data_point['Year'].values[0], y=loss_data_point['% Forest Area'].values[0], text=f"<b>Total % Lost: {loss_data_point['Total % Forest Area Gained or Lost (1993-2021)'].values[0]}%<b>", showarrow=False, yshift=25, font=dict(color='white'))# Edit hover ---> Research how to edit hover!!! current hover is not correcthover_template ="<b>%{hovertext}</b><br>" \"Year: %{x}<br>" \"Forest Area: %{y}% <br>" \"<i>Total Area Loss: %{customdata}% <i><extra></extra>"# " <br>" \ -> for added enterfig.update_traces(hovertemplate=hover_template)# Edit Layout of Graphfig.update_traces(hoverlabel=dict(font=dict(color='black'), bgcolor='lightgray')) #hoverdatafig.update_traces(selector={'name': highest_loss_country}, line=dict(color='red')) #selector hover data#fig.update_traces(hoverlabel=dict(font=dict(color='rgb(102,0,0)')), selector={'name': highest_loss_country})fig.update_traces(hoverlabel=dict(font=dict(color='white'), bgcolor='rgb(128,0,0)'), selector={'name': highest_loss_country})fig.update_xaxes(showgrid=False, range=[1993, 2021])fig.update_yaxes(showgrid=False, range=[0, 100])fig.update_layout(showlegend=False, title_x=0.5, title=dict(text='<b>The Country With The Highest Loss of Forest Area From 1993 to 2021<b>', font=dict(size=18)), paper_bgcolor='rgb(17,17,17)', plot_bgcolor='rgb(17,17,17)', font=dict(color='white'))fig.update_layout(width=790, height=500, margin=dict(l=100, r=100, b=100, t=100))#Call for graphfig.show()
Visualization with added animated time-line.
Code
# Static Graphfig = px.line(table_melted, x='Year', y='% Forest Area', color='Country Name', line_shape='linear', hover_name="Country Name", custom_data=['Total % Forest Area Gained or Lost (1993-2021)'] )fig.update_traces(line=dict(color='lightgray'))highest_loss_country = table_melted.loc[table_melted['Total % Forest Area Gained or Lost (1993-2021)'].idxmin()]['Country Name']name_data = table_melted[table_melted['Country Name'] == highest_loss_country].iloc[10]fig.add_annotation(x=name_data['Year'], y=name_data['% Forest Area'], text=f"<b>{highest_loss_country}<b>", showarrow=False, yshift=20, font=dict(color='white'))loss_data_point = table_melted[(table_melted['Country Name'] == highest_loss_country) & (table_melted['Year'] == table_melted['Year'].max())]fig.add_annotation(x=loss_data_point['Year'].values[0], y=loss_data_point['% Forest Area'].values[0], text=f"<b>Total % Lost: {loss_data_point['Total % Forest Area Gained or Lost (1993-2021)'].values[0]}%<b>", showarrow=False, yshift=25, font=dict(color='white'))hover_template ="<b>%{hovertext}</b><br>" \"Year: %{x}<br>" \"Forest Area: %{y}% <br>" \"<i>Total Area Loss: %{customdata}% <i><extra></extra>"fig.update_traces(hovertemplate=hover_template)fig.update_traces(hoverlabel=dict(font=dict(color='black'), bgcolor='lightgray'))fig.update_traces(selector={'name': highest_loss_country}, line=dict(color='red'))fig.update_traces(hoverlabel=dict(font=dict(color='white'), bgcolor='rgb(128,0,0)'), selector={'name': highest_loss_country})fig.update_xaxes(showgrid=False, range=[1993, 2021])fig.update_yaxes(showgrid=False, range=[0, 100])fig.update_layout(showlegend=False, title_x=0.5, title=dict(text='<b>The Country With The Highest Loss of Forest Area From 1993 to 2021<b>', font=dict(size=18)), paper_bgcolor='rgb(17,17,17)', plot_bgcolor='rgb(17,17,17)', font=dict(color='white'))fig.update_layout(width=790, height=500, margin=dict(l=100, r=100, b=100, t=100))# Add frames for animationframes = []for year insorted(table_melted['Year'].unique()): frame_data = table_melted[table_melted['Year'] <= year] frame_traces = [ go.Scatter( x=frame_data[frame_data['Country Name'] == country]['Year'], y=frame_data[frame_data['Country Name'] == country]['% Forest Area'], mode='lines', line=dict(color='red'if country == highest_loss_country else'gray'), name=country ) for country in frame_data['Country Name'].unique() ] frame_traces.append(go.Scatter( x=[max(frame_data['Year'])], y=[max(frame_data['% Forest Area'])], text=[str(year)], mode="text", showlegend=False )) frame = go.Frame(data=frame_traces, name=str(year)) frames.append(frame)fig.frames = frames# Add animation controlsfig.update_layout( updatemenus=[{'type': 'buttons','buttons': [{'label': 'Play','method': 'animate','args': [None, {'frame': {'duration': 300, 'redraw': True}, 'fromcurrent': True, 'transition': {'duration': 300, 'easing': 'linear'}}] }],'direction': 'left','showactive': False,'x': -0.05,'xanchor': 'right','y': 0,'yanchor': 'top' }], sliders=[{'steps': [{'method': 'animate', 'args': [[f'{year}'], {'frame': {'duration': 400, 'redraw': True}, 'mode': 'immediate','transition': {'duration': 200}}],'label': str(year)} for year insorted(table_melted['Year'].unique())] }])# Show the plotfig.show()
Query 2. Which countries have gained the most forest area over time?
Run query for full table.
Code
#Create database with only Indicator_Name ='Forest area (% of land area)'query2 ="""SELECT DISTINCT Country_Name AS 'Country Name', Indicator_Name AS 'Indicator Name', y1993, y1994, y1995, y1996, y1997, y1998, y1999, y2000, y2001, y2002, y2003, y2004, y2005, y2006, y2007, y2008, y2009, y2010, y2011, y2012, y2013, y2014, y2015, y2016, y2017, y2018, y2019, y2020, y2021, (y2021 - y1993) AS 'Total % Forest Area Gained or Lost (1993-2021)'FROM ForestDBWHERE Indicator_Name ='Forest area (% of land area)'ORDER BY (y2021 - y1993) DESC"""FullTable = connection.execute(query2).df()
Reorganize the table.
Code
table_melted2 = FullTable.melt(id_vars=['Country Name', 'Indicator Name', 'Total % Forest Area Gained or Lost (1993-2021)'], var_name='Year', value_name='% Forest Area')table_melted2['Year'] = table_melted2['Year'].str.extract('(\d+)', expand=False).astype(int)table_melted2
Country Name
Indicator Name
Total % Forest Area Gained or Lost (1993-2021)
Year
% Forest Area
0
Bhutan
Forest area (% of land area)
18.0
1993
54.0
1
Viet Nam
Forest area (% of land area)
16.0
1993
31.0
2
Puerto Rico
Forest area (% of land area)
16.0
1993
40.0
3
Cuba
Forest area (% of land area)
11.0
1993
20.0
4
Fiji
Forest area (% of land area)
10.0
1993
53.0
...
...
...
...
...
...
7709
St. Martin (French part)
Forest area (% of land area)
NaN
2021
25.0
7710
Serbia
Forest area (% of land area)
NaN
2021
32.0
7711
Montenegro
Forest area (% of land area)
NaN
2021
61.0
7712
South Sudan
Forest area (% of land area)
NaN
2021
11.0
7713
Hong Kong SAR, China
Forest area (% of land area)
NaN
2021
NaN
7714 rows × 5 columns
Visualization
Code
# Load world map GeoJSON fileworld_map = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres')) #evertually change with the downloaded document#world_map['geometry'] = world_map['geometry'].simplify(tolerance=0.01) #Adding this line makes running the code a bit faster# Update the country names to match the one in world_maptable_melted2.loc[table_melted2['Country Name'] =='United States', 'Country Name'] ='United States of America'table_melted2.loc[table_melted2['Country Name'] =='Congo, Rep.', 'Country Name'] ='Congo'table_melted2.loc[table_melted2['Country Name'] =='Congo, Dem. Rep.', 'Country Name'] ='Dem. Rep. Congo'table_melted2.loc[table_melted2['Country Name'] =='Egypt, Arab Rep.', 'Country Name'] ='Egypt'table_melted2.loc[table_melted2['Country Name'] =='Yemen, Rep.', 'Country Name'] ='Yemen'table_melted2.loc[table_melted2['Country Name'] =='South Sudan', 'Country Name'] ='S. Sudan'table_melted2.loc[table_melted2['Country Name'] =='Russian Federation', 'Country Name'] ='Russia'table_melted2.loc[table_melted2['Country Name'] =='Venezuela, RB', 'Country Name'] ='Venezuela'table_melted2.loc[table_melted2['Country Name'] =='Central African Republic', 'Country Name'] ='Central African Rep.'table_melted2.loc[table_melted2['Country Name'] =='Dominican Republic', 'Country Name'] ='Dominican Rep.'table_melted2.loc[table_melted2['Country Name'] =='Bahamas, The', 'Country Name'] ='Bahamas'table_melted2.loc[table_melted2['Country Name'] =="Cote d'Ivoire", 'Country Name'] ="Côte d'Ivoire"table_melted2.loc[table_melted2['Country Name'] =='Iran, Islamic Rep.', 'Country Name'] ='Iran'table_melted2.loc[table_melted2['Country Name'] =='Syrian Arab Republic', 'Country Name'] ='Syria'table_melted2.loc[table_melted2['Country Name'] =='Turkiye', 'Country Name'] ='Turkey'table_melted2.loc[table_melted2['Country Name'] =='Viet Nam', 'Country Name'] ='Vietnam'table_melted2.loc[table_melted2['Country Name'] =='Lao PDR', 'Country Name'] ='Laos'table_melted2.loc[table_melted2['Country Name'] =="Korea, Dem. People's Rep.", 'Country Name'] ='North Korea'table_melted2.loc[table_melted2['Country Name'] =='Korea, Rep.', 'Country Name'] ='South Korea'table_melted2.loc[table_melted2['Country Name'] =='Solomon Islands', 'Country Name'] ='Solomon Is.'table_melted2.loc[table_melted2['Country Name'] =='S. Sudan', 'Country Name'] ='S. Sudan'#Missing Countries: S.Sudan and Côte d'Ivoire (present, but cannot add), Taiwan, Falkland Is., Antartica not in the table.merged_data = world_map.merge(table_melted2, left_on='name', right_on='Country Name', how='left')fig = px.choropleth(merged_data, geojson=merged_data.geometry, locations=merged_data.index, color='Total % Forest Area Gained or Lost (1993-2021)', hover_name='name', hover_data=['Total % Forest Area Gained or Lost (1993-2021)'], color_continuous_scale='Blackbody', #Inferno, Hot or Blackbody projection='natural earth')fig.update_geos(showcountries=True, countrycolor="lightgray", showcoastlines=True, coastlinecolor="white")fig.update_layout(title='<b> Global Changes in Forest Area: From 1993 to 2021 <b>', title_x=.5, title_y=0.95, font=dict(size=15), )fig.update_layout( coloraxis_colorbar=dict(title='<b>Forest Area Change<b>', thickness=20,len=0.6, titlefont=dict(size=14), tickfont=dict(size=12), tickvals=[-20,-15, -10, -5, 0, 5, 10, 15], ticktext=['-20%','-15%', '-10%', '-5%', '0%', '5%', '10%', '15%'], ))fig.update_layout(margin={"r":0,"t":0,"l":30,"b":0}, width =790, height =450)fig.update_geos(projection_scale=1.1)hover_template ="<b>%{hovertext}</b><br>"+\"%{customdata}% Forest Area Loss/Gain <br>"+\"<extra></extra>"fig.update_traces(hovertemplate=hover_template)fig.show()
C:\Users\ERMCl\AppData\Local\Temp\ipykernel_85236\1706595822.py:2: FutureWarning:
The geopandas.dataset module is deprecated and will be removed in GeoPandas 1.0. You can get the original 'naturalearth_lowres' data from https://www.naturalearthdata.com/downloads/110m-cultural-vectors/.